You may work in a small group on the project.
Your code and analysis should be delivered in an IPython Notebook by end of day Monday 10/14. .
We have decide to build on flight data from OpenFlights.org from our first project.
We already have Routes sets loaded. We will use Airplanes as our second node.
First import necessary packages for plotting graphs using NetworkX and Matplotlib and set up graph size parameters...
import networkx as nx
from networkx.algorithms import bipartite as bi
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams["figure.figsize"] = (15,12)
Read in Routes data to a Pandas dataframe.
routes_raw = pd.read_csv("Project1_Data/routes.dat",
header=None, sep=",",
usecols=[2, 4],
names=['Airline', 'Airport'])
#routes_raw1=routes_raw.drop_duplicates(subset=['Airline', 'Airport'],keep=False)
routes_raw=routes_raw.dropna()
routes_raw=routes_raw[routes_raw.Airport!="\\N"]
routes_raw.head()
aa = routes_raw.copy()
#df['sum_A'] = df.groupby('B')['A'].transform('sum')
aa['weight'] = aa.groupby('Airline')['Airport'].transform('count')#.reset_index()
aa.head(10)
#aa['Airline'] = 'Airline_' + aa.Airline.map(str)
#aa['Airport'] = 'Airport_' + aa.Airport.map(str)
aa.head(10)
len(aa)
airports_raw = pd.read_csv("Project1_Data/airports.dat",
header=None, sep=",",
usecols=[0,1, 2, 3, 4, 6, 7],
names=['Airport','Airport Name','City','Country','IATA', 'Latitude', 'Longitude'])
#airports_raw1=airports_raw[airports_raw.Country=='France']
airports_raw.head()
len(airports_raw)
airports = airports_raw.copy()
#airports['Airport']='Airport_' + airports.IATA.map(str)
airports['Airport']=airports.IATA
aa_merged = pd.merge(aa,airports,left_on='Airport',right_on='Airport',how='inner')
aa_merged.head(20)
airport_nodes = aa_merged[['Airport','Airport Name','City','Country','IATA','Latitude','Longitude']].drop_duplicates()
airport_nodes.head()
...
B = nx.Graph()
B.add_nodes_from(aa_merged['Airline'], bipartite=0)
B.add_nodes_from(aa_merged['Airport'], bipartite=1)
B.add_weighted_edges_from([tuple(d) for d in aa_merged[['Airline','Airport','weight']].values])
nx.set_node_attributes(B, airport_nodes.set_index('Airport').to_dict('index'))
Let's double check if our attributes were saved in the graph data.
list(B.nodes(data=True))[:3]
list(B.nodes(data=True))[-2:]
B.get_edge_data('Airline_AER','Airport_KZN')
print(nx.info(B))
airline_nodes = {n for n, d in B.nodes(data=True) if d['bipartite']==0}
airport_nodes = set(B) - airline_nodes
nx.is_connected(B)
bi.is_bipartite(B)
print(bi.density(B, airline_nodes))
print(bi.density(B, airport_nodes))
'''
# textbook function doesn't work...
def trim_edges(g, weight=1):
g2=nx.Graph()
for f, to, edata in g.edges(data=True):
if edata['weight'] > weight:
g2.add_edge(f, to, edata)
return g2
'''
def trim_edges(g, weight=1):
g2=nx.Graph()
my_list=[]
my_list1=[]
for f, to, edata in g.edges(data=True):
if edata['weight'] > weight:
my_list.append(f)
my_list1.append(to)
g2.add_edge(f,to,attr_dict={weight:edata['weight']})
g2.add_nodes_from(my_list, bipartite=0)
g2.add_nodes_from(my_list1, bipartite=1)
return g2
def island_method(g, iterations=5):
weights = [edata['weight'] for f, to, edata in g.edges(data=True)]
mn=int(min(weights))
mx=int(max(weights))
#compute the size of the step, so we get a reasonable step in iterations
step=int((mx-mn)/iterations)
return [[threshold, trim_edges(g, threshold)] for threshold in range(mn,mx,step)]
island = island_method(B)
for i in island:
# print the threshold level, size of the graph, and number of connected components
print(i[0], len(i[1]), len(list(nx.connected_component_subgraphs(i[1]))))
G0=max(nx.connected_component_subgraphs(island[0][1]), key=len)
colors = []
for node, data in G0.nodes(data=True):
if data['bipartite'] == 1:
colors.append('cornflowerblue') # Airports in Blue
else:
colors.append('orchid') # Airlines in pink
#nx.draw(G, with_labels=True, node_size=100, node_color=colors, font_size=10, font_family="calibri", width=1,
# edge_color="black", alpha=0.8)
nx.draw(G0,
with_labels=True,
node_color=colors,
node_size=200,
font_size=10,
font_weight='bold',
edge_color="skyblue",
alpha=0.5)
G1=island[1][1]
colors = []
for node, data in G1.nodes(data=True):
if data['bipartite'] == 1:
colors.append('cornflowerblue') # Airports in Blue
else:
colors.append('orchid') # Airlines in pink
#nx.draw(G, with_labels=True, node_size=100, node_color=colors, font_size=10, font_family="calibri", width=1,
# edge_color="black", alpha=0.8)
nx.draw(G1,
with_labels=True,
node_color=colors,
node_size=200,
font_size=10,
font_weight='bold',
edge_color="skyblue",
alpha=0.5)
G2=island[2][1]
colors = []
for node, data in G2.nodes(data=True):
if data['bipartite'] == 1:
colors.append('cornflowerblue') # Airports in Blue
else:
colors.append('orchid') # Airlines in pink
#nx.draw(G, with_labels=True, node_size=100, node_color=colors, font_size=10, font_family="calibri", width=1,
# edge_color="black", alpha=0.8)
nx.draw(G2,
with_labels=True,
node_color=colors,
node_size=200,
font_size=10,
font_weight='bold',
edge_color="skyblue",
alpha=0.5)
G3=island[3][1]
colors = []
for node, data in G3.nodes(data=True):
if data['bipartite'] == 1:
colors.append('cornflowerblue') # Airports in Blue
else:
colors.append('orchid') # Airlines in pink
#nx.draw(G, with_labels=True, node_size=100, node_color=colors, font_size=10, font_family="calibri", width=1,
# edge_color="black", alpha=0.8)
nx.draw(G3,
with_labels=True,
node_color=colors,
node_size=200,
font_size=10,
font_weight='bold',
edge_color="skyblue",
alpha=0.5)
# compute an affiliation network of the Airports
airports = bi.weighted_projected_graph(B, airport_nodes)
# Find the largest connected subgraph in the network
#users_subgraph = nx.connected_component_subgraphs(users)[0] # textbook code doesn't work
airports_subgraph = max(nx.connected_component_subgraphs(airports), key=len) # alternate method
airports_subgraph.name = "Airports"
print(nx.info(airports_subgraph))
nx.is_connected(airports_subgraph)
bi.is_bipartite(airports_subgraph)
airport_islands=island_method(airports_subgraph)
for i in airport_islands:
# print the threshold level, size of the graph, and number of connected components
print(i[0], len(i[1]), len(list(nx.connected_component_subgraphs(i[1]))))
API=max(nx.connected_component_subgraphs(airport_islands[0][1]), key=len)
# set plot size
plt.rcParams["figure.figsize"] = (15,15)
nx.draw(API,
with_labels=True,
node_color='cornflowerblue',
node_size=200,
font_size=10,
font_weight='bold',
edge_color="cornflowerblue",
alpha=0.5)
# compute an affiliation network of the Airlines
airlines = bi.weighted_projected_graph(B, airline_nodes)
# Find the largest connected subgraph in the network
#users_subgraph = nx.connected_component_subgraphs(users)[0] # textbook code doesn't work
airlines_subgraph = max(nx.connected_component_subgraphs(airlines), key=len) # alternate method
airlines_subgraph.name = "Airlines"
print(nx.info(airlines_subgraph))
nx.is_connected(airlines_subgraph)
bi.is_bipartite(airlines_subgraph)
airline_islands=island_method(airlines_subgraph)
for i in airline_islands:
# print the threshold level, size of the graph, and number of connected components
print(i[0], len(i[1]), len(list(nx.connected_component_subgraphs(i[1]))))
ALI=max(nx.connected_component_subgraphs(airline_islands[0][1]), key=len)
# set plot size
plt.rcParams["figure.figsize"] = (15,15)
nx.draw(ALI,
with_labels=True,
node_color='orchid',
node_size=200,
font_size=10,
font_weight='bold',
edge_color="orchid",
alpha=0.5)